//
// Project: Disagreement in science: Missing women



clear all
version 15.1  



//
// set locals

// method of identifying gender
local female "female_genderize"
local male "male_genderize"

// gender of author is known
local known_gender "female_genderize!=."



//
// MAIN EFFECTS MODEL

//
// AER

// call data
use "${data}/output/aer_data_gender.dta", clear
drop if month=="May" & year!=2019  // exclude AEA papers and proceedings
drop if year==2020  
keep if (comment | research_article)

// count number of authors in article (not necessarily with identified gender)
by article_id, sort: egen number_authors = count(article_id)
generate single_authored = number_authors==1

// analysis
local i=1
local coefficients`i' "2 4"
regress single_authored i.`female' i.comment i.year if `known_gender', vce(robust)
matrix m`i' = r(table)
scalar n`i' = e(N)
scalar rsq`i' = e(r2)
* p-vales
foreach k of numlist `coefficients`i'' {
	local p`i'_`k' = m`i'[4,`k']
}



//
// ASR

// call data
use "${data}/output/asr_data_gender.dta", clear
keep if (comment | research_article)

// count number of authors in article (not necessarily with identified gender)
by article_id, sort: egen number_authors = count(article_id)
generate single_authored = number_authors==1

// analysis
local i=2
local coefficients`i' "2 4"
regress single_authored i.`female' i.comment i.year if `known_gender', vce(robust)
matrix m`i' = r(table)
scalar n`i' = e(N)
scalar rsq`i' = e(r2)
* p-vales
foreach k of numlist `coefficients`i'' {
	local p`i'_`k' = m`i'[4,`k']
}



//
// JAMA

// call data
use "${data}/output/jama_pubmed_data_gender.dta", clear
drop if year==2020  
drop if year<2002  // full author names from PubMed not available
drop if year<2013  // Comment & Response section started in July 2013
drop if year==2013 & month=="January"  // Comment & Response section started in July 2013
drop if year==2013 & month=="February"  // Comment & Response section started in July 2013
drop if year==2013 & month=="March"  // Comment & Response section started in July 2013
drop if year==2013 & month=="April"  // Comment & Response section started in July 2013
drop if year==2013 & month=="May"  // Comment & Response section started in July 2013
drop if year==2013 & month=="June"  // Comment & Response section started in July 2013 
keep if comment | research_article
drop if article_with_etal
drop if strpos(full_name, "Fontanarosa")  // this JAMA editor appeared as first author of letters to the editor

// count number of authors in article (not necessarily with identified gender)
by article_id, sort: egen number_authors = count(article_id)
generate single_authored = number_authors==1

// analysis
local i=3
local coefficients`i' "2 4"
regress single_authored i.`female' i.comment i.year if `known_gender', vce(robust)
matrix m`i' = r(table)
scalar n`i' = e(N)
scalar rsq`i' = e(r2)
* p-vales
foreach k of numlist `coefficients`i'' {
	local p`i'_`k' = m`i'[4,`k']
}



//
// Nature

// call data
use "${data}/output/nature_data_gender.dta", clear
drop if year==2020
keep if comment | research_article

// count number of authors in article (not necessarily with identified gender)
by article_id, sort: egen number_authors = count(article_id)
generate single_authored = number_authors==1

// analysis
local i=4
local coefficients`i' "2 4"
regress single_authored i.`female' i.comment i.year if `known_gender', vce(robust)
matrix m`i' = r(table)
scalar n`i' = e(N)
scalar rsq`i' = e(r2)
* p-vales
foreach k of numlist `coefficients`i'' {
	local p`i'_`k' = m`i'[4,`k']
}



//
// PNAS

// call data
use "${data}/output/pnas_data_gender.dta", clear
drop if full_name=="II" | full_name=="III" | full_name=="IV" | full_name=="Jr" | full_name=="Jr."  // erroneously scraped as separate author-article observations
drop if year==2020 | year<2008  // PNAS started comments in 2008
keep if comment | research_article

// count number of authors in article (not necessarily with identified gender)
by article_id, sort: egen number_authors = count(article_id)
generate single_authored = number_authors==1

// analysis
local i=5
local coefficients`i' "2 4"
regress single_authored i.`female' i.comment i.year if `known_gender', vce(robust)
matrix m`i' = r(table)
scalar n`i' = e(N)
scalar rsq`i' = e(r2)
* p-vales
foreach k of numlist `coefficients`i'' {
	local p`i'_`k' = m`i'[4,`k']
}



//
// Science

// call data
use "${data}/output/science_data_gender.dta", clear
drop if year==2020
keep if comment | research_article

// count number of authors in article (not necessarily with identified gender)
by article_id, sort: egen number_authors = count(article_id)
generate single_authored = number_authors==1

// analysis
local i=6
local coefficients`i' "2 4"
regress single_authored i.`female' i.comment i.year if `known_gender', vce(robust)
matrix m`i' = r(table)
scalar n`i' = e(N)
scalar rsq`i' = e(r2)
* p-vales
foreach k of numlist `coefficients`i'' {
	local p`i'_`k' = m`i'[4,`k']
}




//
// INTERACTED MODEL

//
// AER

// call data
use "${data}/output/aer_data_gender.dta", clear
drop if month=="May" & year!=2019  // exclude AEA papers and proceedings
drop if year==2020  
keep if (comment | research_article)

// count number of authors in article (not necessarily with identified gender)
by article_id, sort: egen number_authors = count(article_id)
generate single_authored = number_authors==1

// analysis
local i=7
local coefficients`i' "2 4 8"
regress single_authored i.`female'##i.comment i.year if `known_gender', vce(robust)
matrix m`i' = r(table)
scalar n`i' = e(N)
scalar rsq`i' = e(r2)
* p-vales
foreach k of numlist `coefficients`i'' {
	local p`i'_`k' = m`i'[4,`k']
}



//
// ASR

// call data
use "${data}/output/asr_data_gender.dta", clear
keep if (comment | research_article)

// count number of authors in article (not necessarily with identified gender)
by article_id, sort: egen number_authors = count(article_id)
generate single_authored = number_authors==1

// analysis
local i=8
local coefficients`i' "2 4 8"
regress single_authored i.`female'##i.comment i.year if `known_gender', vce(robust)
matrix m`i' = r(table)
scalar n`i' = e(N)
scalar rsq`i' = e(r2)
* p-vales
foreach k of numlist `coefficients`i'' {
	local p`i'_`k' = m`i'[4,`k']
}



//
// JAMA

// call data
use "${data}/output/jama_pubmed_data_gender.dta", clear
drop if year==2020  
drop if year<2002  // full author names from PubMed not available
drop if year<2013  // Comment & Response section started in July 2013
drop if year==2013 & month=="January"  // Comment & Response section started in July 2013
drop if year==2013 & month=="February"  // Comment & Response section started in July 2013
drop if year==2013 & month=="March"  // Comment & Response section started in July 2013
drop if year==2013 & month=="April"  // Comment & Response section started in July 2013
drop if year==2013 & month=="May"  // Comment & Response section started in July 2013
drop if year==2013 & month=="June"  // Comment & Response section started in July 2013 
keep if comment | research_article
drop if article_with_etal
drop if strpos(full_name, "Fontanarosa")  // this JAMA editor appeared as first author of letters to the editor

// count number of authors in article (not necessarily with identified gender)
by article_id, sort: egen number_authors = count(article_id)
generate single_authored = number_authors==1

// analysis
local i=9
local coefficients`i' "2 4 8"
regress single_authored i.`female'##i.comment i.year if `known_gender', vce(robust)
matrix m`i' = r(table)
scalar n`i' = e(N)
scalar rsq`i' = e(r2)
* p-vales
foreach k of numlist `coefficients`i'' {
	local p`i'_`k' = m`i'[4,`k']
}



//
// Nature

// call data
use "${data}/output/nature_data_gender.dta", clear
drop if year==2020
keep if comment | research_article

// count number of authors in article (not necessarily with identified gender)
by article_id, sort: egen number_authors = count(article_id)
generate single_authored = number_authors==1

// analysis
local i=10
local coefficients`i' "2 4 8"
regress single_authored i.`female'##i.comment i.year if `known_gender', vce(robust)
matrix m`i' = r(table)
scalar n`i' = e(N)
scalar rsq`i' = e(r2)
* p-vales
foreach k of numlist `coefficients`i'' {
	local p`i'_`k' = m`i'[4,`k']
}



//
// PNAS

// call data
use "${data}/output/pnas_data_gender.dta", clear
drop if full_name=="II" | full_name=="III" | full_name=="IV" | full_name=="Jr" | full_name=="Jr."  // erroneously scraped as separate author-article observations
drop if year==2020 | year<2008  // PNAS started comments in 2008
keep if comment | research_article

// count number of authors in article (not necessarily with identified gender)
by article_id, sort: egen number_authors = count(article_id)
generate single_authored = number_authors==1

// analysis
local i=11
local coefficients`i' "2 4 8"
regress single_authored i.`female'##i.comment i.year if `known_gender', vce(robust)
matrix m`i' = r(table)
scalar n`i' = e(N)
scalar rsq`i' = e(r2)
* p-vales
foreach k of numlist `coefficients`i'' {
	local p`i'_`k' = m`i'[4,`k']
}



//
// Science

// call data
use "${data}/output/science_data_gender.dta", clear
drop if year==2020
keep if comment | research_article

// count number of authors in article (not necessarily with identified gender)
by article_id, sort: egen number_authors = count(article_id)
generate single_authored = number_authors==1

// analysis
local i=12
local coefficients`i' "2 4 8"
regress single_authored i.`female'##i.comment i.year if `known_gender', vce(robust)
matrix m`i' = r(table)
scalar n`i' = e(N)
scalar rsq`i' = e(r2)
* p-vales
foreach k of numlist `coefficients`i'' {
	local p`i'_`k' = m`i'[4,`k']
}



//
// create significance stars for regressions above
forvalues j=1/12 {
	foreach k of numlist `coefficients`j'' {
		if `p`j'_`k''<0.1 & `p`j'_`k''>=0.05 {
			scalar st`j'_`k'="*"
		}
		else if `p`j'_`k''<0.05 & `p`j'_`k''>=0.01 {
			scalar st`j'_`k'="**"
		}
		else if `p`j'_`k''<0.01 & `p`j'_`k''>=0.001 {
			scalar st`j'_`k'="***"
		}
		else if `p`j'_`k''<0.001 {
			scalar st`j'_`k'="****"
		}
		else {
			scalar st`j'_`k'=""
		}
	}
}





